import requests
import re
import json
import time
import pymysql
import re

"""
create table jd_comment(
    id BIGINT,
    content text,
    score double,
    productColor varchar(255),
    productSize varchar(255),
    location varchar(255),
    referenceTime varchar(255),
    nickname  varchar(255),
    referenceName varchar(255),
    PRIMARY KEY (id) 
);

"""

# 请求头
headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
    "cookie": "unpl=JF8EAJpnNSttXUoDAxoCGBQXQ1sHWwoOT0QCOGFSUQleHwADSVAaGxd7XlVdXxRKFB9vZhRUWFNJUg4eAisSEXteU11bD00VB2xXXAQDGhUQR09SWEBJJV9cXVgMSxEGaWcDZG1bS2QFGjIbGhVIXVNbWg1OJwJfYDVkbVhPUAwfAysTIEptFgoBDEIQAG9gSFRVXUhUAh4FHhcgSm1X; __jdv=76161171|haosou-search|t_262767352_haosousearch|cpc|39245174717_0_40f70127697b4f75b0f7f4e7ee7cc086|1676011062148; __jdu=1942164644; areaId=14; PCSYCityID=CN_340000_340100_0; shshshfpa=3266f9f3-7184-3fd1-69d3-441931185f41-1676011064; shshshfpx=3266f9f3-7184-3fd1-69d3-441931185f41-1676011064; shshshfpb=oAE7J4Ntf4sRi53lmWg0YnA; __jdc=122270672; shshshfp=6a02fdc6e5b37300fbec7d923d22b597; ip_cityCode=1116; ipLoc-djd=14-1116-3431-57939; jwotest_product=99; jsavif=0; __jda=122270672.1942164644.1676011061.1676011062.1676014923.2; wlfstk_smdl=bkwlredztm4x51nbdpynsvna7nu4wak8; token=59398f490ac78527fc4f997fd3e03043,2,931119; __tk=dfb200db0d22436ccdd9ef9095f34af8,2,931119; JSESSIONID=69E5661302D44F6CC02DA27716029256.s1; __jdb=122270672.7.1942164644|2.1676014923; shshshsID=6d56ca183c10b8593f16bdec2d16491d_5_1676015373525; 3AB9D23F7A4B3C9B=4W4ZKWYZMF7EYKIIV7RLTOM5YIKLDXX2TECGK72T6EVHXDMOGRAL7LM54OO53XPPIJPWTGVAJ6XAU6DBG66QGIQEQY"
}

# 创建数据库链接
connect = pymysql.connect(host="master", port=3306, user="root", passwd="123456", db="bigdata")
# 获取右表
cursor = connect.cursor()

for page in range(10):
    time.sleep(2)
    # 请求地址
    url = f"https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100048276065&score=0&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1"

    # 发起请求
    response = requests.get(url=url, headers=headers)

    # 接续数据
    json_obj = json.loads(response.text[20:-2])

    # 循环解析每个评价
    for comment in json_obj["comments"]:
        id = comment["id"]
        content = comment["content"].replace("\n", "。")

        content = "".join(re.findall('[\u4e00-\u9fa5，。]', content))

        score = comment["score"]
        productColor = comment["productColor"]
        productSize = comment["productSize"]
        location = comment["location"]
        referenceTime = comment["referenceTime"]
        nickname = comment["nickname"]
        referenceName = comment["referenceName"]

        print(id, content)

        # 执行插入数据sql
        cursor.execute("replace into jd_comment values(%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                       [id, content, score, productColor, productSize, location, referenceTime, nickname,
                        referenceName])

        connect.commit()

connect.close()
